mdata <- readxl::read_xlsx("./2014 and 2015 CSM dataset.xlsx")
head(mdata)
## # A tibble: 6 x 14
## Movie Year Ratings Genre Gross Budget Screens Sequel Sentiment Views
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 13 S… 2014 6.3 8 9.13e3 4.00e6 45 1 0 3.28e6
## 2 22 J… 2014 7.1 1 1.92e8 5.00e7 3306 2 2 5.83e5
## 3 3 Da… 2014 6.2 1 3.07e7 2.80e7 2872 1 0 3.05e5
## 4 300:… 2014 6.3 1 1.06e8 1.10e8 3470 2 0 4.53e5
## 5 A Ha… 2014 4.7 8 1.73e7 3.50e6 2310 2 0 3.15e6
## 6 A Lo… 2014 4.6 3 2.90e4 5.00e5 NA 1 0 9.11e4
## # … with 4 more variables: Likes <dbl>, Dislikes <dbl>, Comments <dbl>,
## # `Aggregate Followers` <dbl>
#glimpse(mdata)
mdata_transform <- mdata%>% mutate(Gross_log=log(Gross))
library(dplyr)
library(dataMaid)
## Warning: package 'dataMaid' was built under R version 3.5.2
##
## Attaching package: 'dataMaid'
## The following object is masked from 'package:dplyr':
##
## summarize
library(tidyverse)
## ── Attaching packages ──────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1 ✔ readr 1.1.1
## ✔ tibble 2.1.1 ✔ purrr 0.2.5
## ✔ tidyr 1.0.0 ✔ stringr 1.3.1
## ✔ ggplot2 3.2.1 ✔ forcats 0.3.0
## Warning: package 'ggplot2' was built under R version 3.5.2
## Warning: package 'tibble' was built under R version 3.5.2
## Warning: package 'tidyr' was built under R version 3.5.2
## ── Conflicts ─────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks skimr::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dataMaid::summarize() masks dplyr::summarize()
`%!in%` = Negate(`%in%`)
mdata_encoded<-mdata_transform %>%
mutate(Year=as.factor(Year),
Genre=as.factor(Genre)
) %>%
mutate(isSequel=as.factor(ifelse(Sequel>1,TRUE,FALSE)))
#PDF Output
#makeCodebook(mdata_encoded,replace = TRUE)
#makeDataReport(mdata_encoded,replace = TRUE)
#clean(mdata_encoded,replace = TRUE)
visualize(mdata_encoded,replace = TRUE)
check(mdata_encoded,replace = TRUE)
## $Movie
## $Movie$identifyMissing
## No problems found.
## $Movie$identifyWhitespace
## No problems found.
## $Movie$identifyLoners
## Note that the following levels have at most five observations: 13 Sins, 22 Jump Street, 3 Days to Kill, 300: Rise of an Empire, A Haunted House 2, A Long Way Off, A Million Ways to Die in the West, A Most Violent Year, A Walk Among the Tombstones, About Last Night (221 additional values omitted).
## $Movie$identifyCaseIssues
## No problems found.
## $Movie$identifyNums
## No problems found.
##
## $Year
## $Year$identifyMissing
## No problems found.
## $Year$identifyWhitespace
## No problems found.
## $Year$identifyLoners
## No problems found.
## $Year$identifyCaseIssues
## No problems found.
## $Year$identifyNums
## No problems found.
##
## $Ratings
## $Ratings$identifyMissing
## No problems found.
## $Ratings$identifyOutliers
## Note that the following possible outlier values were detected: 3.1.
##
## $Genre
## $Genre$identifyMissing
## The following suspected missing value codes enter as regular values: 8, 9.
## $Genre$identifyWhitespace
## No problems found.
## $Genre$identifyLoners
## Note that the following levels have at most five observations: 4, 6, 7.
## $Genre$identifyCaseIssues
## No problems found.
## $Genre$identifyNums
## No problems found.
##
## $Gross
## $Gross$identifyMissing
## No problems found.
## $Gross$identifyOutliers
## Note that the following possible outlier values were detected: 6.43e+08.
##
## $Budget
## $Budget$identifyMissing
## No problems found.
## $Budget$identifyOutliers
## No problems found.
##
## $Screens
## $Screens$identifyMissing
## No problems found.
## $Screens$identifyOutliers
## Note that the following possible outlier values were detected: 4080, 4151, 4233, 4253, 4274, 4276, 4301, 4324.
##
## $Sequel
## $Sequel$identifyMissing
## No problems found.
## $Sequel$identifyOutliers
## Note that the following possible outlier values were detected: 2, 3, 4, 5, 6, 7.
##
## $Sentiment
## $Sentiment$identifyMissing
## No problems found.
## $Sentiment$identifyOutliers
## Note that the following possible outlier values were detected: -38, -17, -11, -9, -8, -6, -5, -4, -3, -2 (1 additional values omitted).
##
## $Views
## $Views$identifyMissing
## No problems found.
## $Views$identifyOutliers
## Note that the following possible outlier values were detected: 31859569, 32626778.
##
## $Likes
## $Likes$identifyMissing
## No problems found.
## $Likes$identifyOutliers
## Note that the following possible outlier values were detected: 187162, 370552.
##
## $Dislikes
## $Dislikes$identifyMissing
## No problems found.
## $Dislikes$identifyOutliers
## Note that the following possible outlier values were detected: 3439, 3524, 3565, 3812, 4245, 4382, 4752, 5746, 13960.
##
## $Comments
## $Comments$identifyMissing
## No problems found.
## $Comments$identifyOutliers
## Note that the following possible outlier values were detected: 18077, 24919, 38363.
##
## $`Aggregate Followers`
## $`Aggregate Followers`$identifyMissing
## No problems found.
## $`Aggregate Followers`$identifyOutliers
## No problems found.
##
## $Gross_log
## $Gross_log$identifyMissing
## No problems found.
## $Gross_log$identifyOutliers
## Note that the following possible outlier values were detected: 19.22, 19.27, 19.3, 19.32, 19.36, 19.37, 19.38, 19.6, 19.62, 19.64 (4 additional values omitted).
##
## $isSequel
## $isSequel$identifyMissing
## No problems found.
## $isSequel$identifyWhitespace
## No problems found.
## $isSequel$identifyLoners
## No problems found.
## $isSequel$identifyCaseIssues
## No problems found.
## $isSequel$identifyNums
## No problems found.
#{bjh}does a lot of checks automatically and outputs a PDF report,
#I haven't had time to go through it in detail yet
#makeDataReport(mdata_encoded,replace = TRUE)
#Let me try to remove some of the outliers DataMaid suggested and redo the plots
#Takes it from 231 observations to 205, probably need to scale this back a little bit
mdata_noOutliers<-mdata %>%
filter(Movie!="Left Behind") %>% #Ratings 3.1
filter(Movie !="Jurassic World") %>% # GROSS
filter(Screens <=4080) %>% #SCREENS
#filter(Sentiment>0) %>% #SENTIMENT ALL NEGATIVES ARE OUTLIERS this is too aggressive it removes half the data
filter(Movie %!in% c("The Fault in Our Stars","Fifty Shades of Grey")) %>% #VIEWS ABOVE 31859569
filter(Movie %!in% c("The Fault in Our Stars","Not Cool")) %>% #LIKES ABOVE 187162
filter(Dislikes <= 3439) %>% #DISLIKES
filter(Comments <= 18077) #COMMENTS
#PDF Output
#makeCodebook(mdata_noOutliers,replace = TRUE)
#makeDataReport(mdata_noOutliers,replace = TRUE)
#clean(mdata_noOutliers,replace = TRUE)
visualize(mdata_noOutliers,replace = TRUE)
check(mdata_noOutliers,replace = TRUE)
## $Movie
## $Movie$identifyMissing
## No problems found.
## $Movie$identifyWhitespace
## No problems found.
## $Movie$identifyLoners
## Note that the following levels have at most five observations: 13 Sins, 22 Jump Street, 3 Days to Kill, 300: Rise of an Empire, A Haunted House 2, A Million Ways to Die in the West, A Most Violent Year, A Walk Among the Tombstones, About Last Night, Aloha (195 additional values omitted).
## $Movie$identifyCaseIssues
## No problems found.
## $Movie$identifyNums
## No problems found.
##
## $Year
## $Year$identifyMissing
## No problems found.
## $Year$identifyOutliers
## No problems found.
##
## $Ratings
## $Ratings$identifyMissing
## No problems found.
## $Ratings$identifyOutliers
## Note that the following possible outlier values were detected: 4, 4.2, 4.3.
##
## $Genre
## $Genre$identifyMissing
## No problems found.
## $Genre$identifyOutliers
## No problems found.
##
## $Gross
## $Gross$identifyMissing
## No problems found.
## $Gross$identifyOutliers
## No problems found.
##
## $Budget
## $Budget$identifyMissing
## No problems found.
## $Budget$identifyOutliers
## No problems found.
##
## $Screens
## $Screens$identifyMissing
## No problems found.
## $Screens$identifyOutliers
## Note that the following possible outlier values were detected: 3775, 3777, 3826, 3845, 3856, 3875, 3934, 3936, 3938, 3946 (8 additional values omitted).
##
## $Sequel
## $Sequel$identifyMissing
## No problems found.
## $Sequel$identifyOutliers
## Note that the following possible outlier values were detected: 2, 3, 4, 5, 6, 7.
##
## $Sentiment
## $Sentiment$identifyMissing
## No problems found.
## $Sentiment$identifyOutliers
## Note that the following possible outlier values were detected: -38, -17, -11, -9, -8, -6, -5, -4, -3, -2 (1 additional values omitted).
##
## $Views
## $Views$identifyMissing
## No problems found.
## $Views$identifyOutliers
## No problems found.
##
## $Likes
## $Likes$identifyMissing
## No problems found.
## $Likes$identifyOutliers
## No problems found.
##
## $Dislikes
## $Dislikes$identifyMissing
## No problems found.
## $Dislikes$identifyOutliers
## Note that the following possible outlier values were detected: 2581, 2672, 3439.
##
## $Comments
## $Comments$identifyMissing
## No problems found.
## $Comments$identifyOutliers
## No problems found.
##
## $`Aggregate Followers`
## $`Aggregate Followers`$identifyMissing
## No problems found.
## $`Aggregate Followers`$identifyOutliers
## No problems found.
library("PerformanceAnalytics")
## Warning: package 'PerformanceAnalytics' was built under R version 3.5.2
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
a<- mdata_noOutliers %>% select_if(is.numeric)
chart.Correlation(a , histogram=TRUE, pch=19)
#Looks more normal but still long tails
mdata_noOutliers %>% mutate_if(is.numeric, ~.) %>% visualize(visuals = setVisuals(all = "basicVisual"))
#EDAreg
source("./Scripts/edaFunctions.R")
## Warning: package 'gplots' was built under R version 3.5.2
##
## Attaching package: 'gplots'
## The following object is masked from 'package:PerformanceAnalytics':
##
## textplot
## The following object is masked from 'package:stats':
##
## lowess
## corrplot 0.84 loaded
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
hist(mdata_encoded$Gross)
hist(mdata_encoded$Gross_log)
str(mdata_encoded)
## Classes 'tbl_df', 'tbl' and 'data.frame': 231 obs. of 16 variables:
## $ Movie : chr "13 Sins" "22 Jump Street" "3 Days to Kill" "300: Rise of an Empire" ...
## $ Year : Factor w/ 2 levels "2014","2015": 1 1 1 1 1 1 1 1 1 1 ...
## $ Ratings : num 6.3 7.1 6.2 6.3 4.7 4.6 6.1 7.1 6.5 6.1 ...
## $ Genre : Factor w/ 11 levels "1","2","3","4",..: 7 1 1 1 7 3 7 1 9 7 ...
## $ Gross : num 9.13e+03 1.92e+08 3.07e+07 1.06e+08 1.73e+07 2.90e+04 4.26e+07 5.75e+06 2.60e+07 4.86e+07 ...
## $ Budget : num 4.00e+06 5.00e+07 2.80e+07 1.10e+08 3.50e+06 5.00e+05 4.00e+07 2.00e+07 2.80e+07 1.25e+07 ...
## $ Screens : num 45 3306 2872 3470 2310 ...
## $ Sequel : num 1 2 1 2 2 1 1 1 1 1 ...
## $ Sentiment : num 0 2 0 0 0 0 0 2 3 0 ...
## $ Views : num 3280543 583289 304861 452917 3145573 ...
## $ Likes : num 4632 3465 328 2429 12163 ...
## $ Dislikes : num 425 61 34 132 610 7 419 197 419 532 ...
## $ Comments : num 636 186 47 590 1082 ...
## $ Aggregate Followers: num 1120000 12350000 483000 568000 1923800 ...
## $ Gross_log : num 9.12 19.07 17.24 18.48 16.67 ...
## $ isSequel : Factor w/ 2 levels "FALSE","TRUE": 1 2 1 2 2 1 1 1 1 1 ...
#HISTOGRAMS OF NUMERIC
histAllNumeric(mdata_encoded)
## Warning: Removed 46 rows containing non-finite values (stat_density).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 46 rows containing non-finite values (stat_bin).
smoothHistAllNumeric(mdata_encoded)
## Warning: Removed 46 rows containing non-finite values (stat_density).
heatmapper(mdata_encoded)
## Warning in plot.window(...): "dendogram" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dendogram" is not a graphical parameter
## Warning in title(...): "dendogram" is not a graphical parameter
correlator(mdata_encoded)
boxplotCats(mdata_encoded,"Gross_log")
boxplotCats(mdata_encoded,"Gross")
violinPlotCats(mdata_encoded,"Gross_log")
violinPlotCats(mdata_encoded,"Gross")
model1 <- lm(data = mdata_encoded, Gross_log ~.)
summary(model1)
##
## Call:
## lm(formula = Gross_log ~ ., data = mdata_encoded)
##
## Residuals:
## ALL 187 residuals are 0: no residual degrees of freedom!
##
## Coefficients: (22 not defined because of singularities)
## Estimate Std. Error t value
## (Intercept) 9.1193 NA NA
## Movie22 Jump Street 9.9537 NA NA
## Movie3 Days to Kill 8.1205 NA NA
## Movie300: Rise of an Empire 9.3596 NA NA
## MovieA Haunted House 2 7.5469 NA NA
## MovieA Million Ways to Die in the West 8.4480 NA NA
## MovieA Most Violent Year 6.4454 NA NA
## MovieA Walk Among the Tombstones 7.9543 NA NA
## MovieAbout Last Night 8.5798 NA NA
## MovieAmerican Sniper 10.5541 NA NA
## MovieAmerican Ultra 7.0850 NA NA
## MovieAmericons 2.6482 NA NA
## MovieAnd So It Goes 7.4175 NA NA
## MovieAnnabelle 9.1306 NA NA
## MovieAnnie 9.1494 NA NA
## MovieAtlas Shrugged: Who Is John Galt? 4.5099 NA NA
## MovieAvengers: Age of Ultron 10.8231 NA NA
## MovieBarefoot 0.2565 NA NA
## MovieBetter Living Through Chemistry 2.0693 NA NA
## MovieBeyond the Lights 7.3772 NA NA
## MovieBig Hero 6 10.0989 NA NA
## MovieBlack or White 7.7689 NA NA
## MovieBlackhat 6.6563 NA NA
## MovieBlended 8.5313 NA NA
## MovieBoyhood 7.9309 NA NA
## MovieBrick Mansions 7.7068 NA NA
## MovieCake 5.3221 NA NA
## MovieCaptain America: The Winter Soldier 10.2569 NA NA
## MovieDawn of the Planet of the Apes 10.0385 NA NA
## MovieDeliver Us from Evil 8.1139 NA NA
## MovieDevil's Due 7.4562 NA NA
## MovieDivergent 9.7135 NA NA
## MovieDolphin Tale 2 8.4339 NA NA
## MovieDope 7.5176 NA NA
## MovieDraft Day 8.0566 NA NA
## MovieDumb and Dumber To 9.1529 NA NA
## MovieEarth to Echo 8.3572 NA NA
## MovieEdge of Tomorrow 9.3014 NA NA
## MovieEndless Love 7.8489 NA NA
## MovieEntourage 8.1743 NA NA
## MovieFantastic Four 8.6608 NA NA
## MovieFifty Shades of Grey 9.8082 NA NA
## MovieFoxcatcher 4.8868 NA NA
## MovieFurious 7 10.5541 NA NA
## MovieFury 9.1470 NA NA
## MovieGet Hard 9.2004 NA NA
## MovieGod Help the Girl 2.4134 NA NA
## MovieGod's Not Dead 8.8038 NA NA
## MovieGodzilla 9.9995 NA NA
## MovieGone Girl 9.8202 NA NA
## MovieGuardians of the Galaxy 10.5043 NA NA
## MovieHappy Christmas 1.1930 NA NA
## MovieHercules 8.9825 NA NA
## MovieHitman: Agent 47 7.4750 NA NA
## MovieHome 9.8723 NA NA
## MovieHorrible Bosses 2 8.6926 NA NA
## MovieHot Pursuit 8.2371 NA NA
## MovieHot Tub Time Machine 2 7.2058 NA NA
## MovieHow to Train Your Dragon 2 9.8723 NA NA
## MovieIf I Stay 8.6182 NA NA
## MovieIn the Name of My Daughter 3.4052 NA NA
## MovieInside Out 10.5397 NA NA
## MovieInsurgent 9.5637 NA NA
## MovieInterstellar 9.9326 NA NA
## MovieInto the Storm 8.5590 NA NA
## MovieInto the Woods 9.5482 NA NA
## MovieJack Ryan: Shadow Recruit 8.6182 NA NA
## MovieJersey Boys 8.5463 NA NA
## MovieJupiter Ascending 8.5548 NA NA
## MovieJurassic World 11.1623 NA NA
## MovieKill the Messenger 5.5923 NA NA
## MovieKingsman: The Secret Service 9.5482 NA NA
## MovieLeft Behind 7.3352 NA NA
## MovieLet's Be Cops 9.1078 NA NA
## MovieLocker 13 -1.3073 NA NA
## MovieMad Max: Fury Road 9.7266 NA NA
## MovieMaggie 2.6636 NA NA
## MovieMagic Mike XXL 8.8843 NA NA
## MovieMaleficent 10.1810 NA NA
## MovieMaps to the Stars 3.6406 NA NA
## MovieMax 8.4315 NA NA
## MovieMe and Earl and the Dying Girl 6.6042 NA NA
## MovieMillion Dollar Arm 8.2908 NA NA
## MovieMinions 10.4800 NA NA
## MovieMission: Impossible - Rogue Nation 9.8379 NA NA
## MovieMoms' Night Out 7.0380 NA NA
## MovieMortdecai 6.7257 NA NA
## MovieMr. Peabody & Sherman 9.4147 NA NA
## MovieMuppets Most Wanted 8.6319 NA NA
## MovieNeed for Speed 8.4712 NA NA
## MovieNeighbors 9.7068 NA NA
## MovieNight at the Museum: Secret of the Tomb 9.4324 NA NA
## MovieNo Good Deed 8.6570 NA NA
## MovieNoah 9.3113 NA NA
## MovieNon-Stop 9.2114 NA NA
## MovieOuija 8.6241 NA NA
## MoviePaddington 9.0282 NA NA
## MoviePaper Towns 8.1430 NA NA
## MovieParanormal Activity: The Marked Ones 8.1774 NA NA
## MoviePaul Blart: Mall Cop 2 8.9589 NA NA
## MoviePenguins of Madagascar 9.1186 NA NA
## MoviePitch Perfect 2 9.9057 NA NA
## MoviePixels 8.9589 NA NA
## MoviePlanes: Fire & Rescue 8.7771 NA NA
## MoviePoltergeist 8.5548 NA NA
## MoviePompeii 7.8403 NA NA
## MovieProject Almanac 7.8008 NA NA
## MovieRicki and the Flash 7.8532 NA NA
## MovieRide Along 9.5940 NA NA
## MovieRio 2 9.5790 NA NA
## MovieRoad Hard 2.4519 NA NA
## MovieRoboCop 8.7669 NA NA
## MovieSabotage 7.0476 NA NA
## MovieSan Andreas 9.7331 NA NA
## MovieSelma 8.6494 NA NA
## MovieSeventh Son 7.5411 NA NA
## MovieSex Tape 8.3468 NA NA
## MovieSin City: A Dame to Kill For 7.3209 NA NA
## MovieSinister 2 7.6406 NA NA
## MovieSon of God 8.7855 NA NA
## MovieSong One 0.7941 NA NA
## MovieSpy 9.3967 NA NA
## MovieSt. Vincent 8.4826 NA NA
## MovieStraight Outta Compton 9.6015 NA NA
## MovieTaken 3 9.1882 NA NA
## MovieTammy 9.1329 NA NA
## MovieTed 2 9.0943 NA NA
## MovieTeenage Mutant Ninja Turtles 9.9485 NA NA
## MovieTerminator Genisys 9.1893 NA NA
## MovieThat Awkward Moment 7.9543 NA NA
## MovieThe Age of Adaline 8.4457 NA NA
## MovieThe Amazing Spider-Man 2 10.0094 NA NA
## MovieThe Best of Me 7.9846 NA NA
## MovieThe Book of Life 8.6122 NA NA
## MovieThe Boxtrolls 8.6241 NA NA
## MovieThe Boy Next Door 8.2629 NA NA
## MovieThe DUFF 8.2226 NA NA
## MovieThe Equalizer 9.3212 NA NA
## MovieThe Expendables 3 8.3674 NA NA
## MovieThe Fault in Our Stars 9.5245 NA NA
## MovieThe Gambler 8.2107 NA NA
## MovieThe Gift 8.2825 NA NA
## MovieThe Giver 8.5051 NA NA
## MovieThe Good Lie 5.6968 NA NA
## MovieThe Gunman 7.0570 NA NA
## MovieThe Hobbit: The Battle of the Five Armies 10.2375 NA NA
## MovieThe Homesman 5.5841 NA NA
## MovieThe Hundred-Foot Journey 8.6889 NA NA
## MovieThe Hunger Games: Mockingjay - Part 1 10.5163 NA NA
## MovieThe Interview 6.5061 NA NA
## MovieThe Judge 8.5485 NA NA
## MovieThe Lazarus Effect 7.9466 NA NA
## MovieThe Legend of Hercules 7.6300 NA NA
## MovieThe Lego Movie 10.2491 NA NA
## MovieThe Loft 6.4846 NA NA
## MovieThe Longest Ride 8.3179 NA NA
## MovieThe Maze Runner 9.3212 NA NA
## MovieThe Monuments Men 9.0529 NA NA
## MovieThe November Man 7.9151 NA NA
## MovieThe One I Love 4.0268 NA NA
## MovieThe Other Woman 9.1258 NA NA
## MovieThe Purge: Anarchy 8.9659 NA NA
## MovieThe Pyramid 5.7078 NA NA
## MovieThe Rover 4.8005 NA NA
## MovieThe SpongeBob Movie: Sponge Out of Water 9.7838 NA NA
## MovieThe Theory of Everything 8.2769 NA NA
## MovieThe Vatican Tapes 5.2327 NA NA
## MovieThe Water Diviner 6.1289 NA NA
## MovieThe Wedding Ringer 8.8629 NA NA
## MovieThe Woman in Black 2: Angel of Death 7.9733 NA NA
## MovieThink Like a Man Too 8.8736 NA NA
## MovieTomorrowland 9.2309 NA NA
## MovieTrainwreck 9.3501 NA NA
## MovieTranscendence 7.8317 NA NA
## MovieTransformers: Age of Extinction 10.1974 NA NA
## MovieTusk 5.2950 NA NA
## MovieUnbroken 9.4498 NA NA
## MovieVacation 8.7017 NA NA
## MovieVeronica Mars 5.8962 NA NA
## MovieWhen the Game Stands Tall 8.1007 NA NA
## MovieWhiplash 7.2688 NA NA
## MovieWild 8.3311 NA NA
## MovieWinter's Tale 0.9019 NA NA
## MovieWish I Was Here 5.9743 NA NA
## MovieWoman in Gold 8.2017 NA NA
## MovieX-Men: Days of Future Past 10.1515 NA NA
## MovieYves Saint Laurent 4.3635 NA NA
## Year2015 NA NA NA
## Ratings NA NA NA
## Genre2 NA NA NA
## Genre3 NA NA NA
## Genre6 NA NA NA
## Genre7 NA NA NA
## Genre8 NA NA NA
## Genre9 NA NA NA
## Genre10 NA NA NA
## Genre12 NA NA NA
## Genre15 NA NA NA
## Gross NA NA NA
## Budget NA NA NA
## Screens NA NA NA
## Sequel NA NA NA
## Sentiment NA NA NA
## Views NA NA NA
## Likes NA NA NA
## Dislikes NA NA NA
## Comments NA NA NA
## `Aggregate Followers` NA NA NA
## isSequelTRUE NA NA NA
## Pr(>|t|)
## (Intercept) NA
## Movie22 Jump Street NA
## Movie3 Days to Kill NA
## Movie300: Rise of an Empire NA
## MovieA Haunted House 2 NA
## MovieA Million Ways to Die in the West NA
## MovieA Most Violent Year NA
## MovieA Walk Among the Tombstones NA
## MovieAbout Last Night NA
## MovieAmerican Sniper NA
## MovieAmerican Ultra NA
## MovieAmericons NA
## MovieAnd So It Goes NA
## MovieAnnabelle NA
## MovieAnnie NA
## MovieAtlas Shrugged: Who Is John Galt? NA
## MovieAvengers: Age of Ultron NA
## MovieBarefoot NA
## MovieBetter Living Through Chemistry NA
## MovieBeyond the Lights NA
## MovieBig Hero 6 NA
## MovieBlack or White NA
## MovieBlackhat NA
## MovieBlended NA
## MovieBoyhood NA
## MovieBrick Mansions NA
## MovieCake NA
## MovieCaptain America: The Winter Soldier NA
## MovieDawn of the Planet of the Apes NA
## MovieDeliver Us from Evil NA
## MovieDevil's Due NA
## MovieDivergent NA
## MovieDolphin Tale 2 NA
## MovieDope NA
## MovieDraft Day NA
## MovieDumb and Dumber To NA
## MovieEarth to Echo NA
## MovieEdge of Tomorrow NA
## MovieEndless Love NA
## MovieEntourage NA
## MovieFantastic Four NA
## MovieFifty Shades of Grey NA
## MovieFoxcatcher NA
## MovieFurious 7 NA
## MovieFury NA
## MovieGet Hard NA
## MovieGod Help the Girl NA
## MovieGod's Not Dead NA
## MovieGodzilla NA
## MovieGone Girl NA
## MovieGuardians of the Galaxy NA
## MovieHappy Christmas NA
## MovieHercules NA
## MovieHitman: Agent 47 NA
## MovieHome NA
## MovieHorrible Bosses 2 NA
## MovieHot Pursuit NA
## MovieHot Tub Time Machine 2 NA
## MovieHow to Train Your Dragon 2 NA
## MovieIf I Stay NA
## MovieIn the Name of My Daughter NA
## MovieInside Out NA
## MovieInsurgent NA
## MovieInterstellar NA
## MovieInto the Storm NA
## MovieInto the Woods NA
## MovieJack Ryan: Shadow Recruit NA
## MovieJersey Boys NA
## MovieJupiter Ascending NA
## MovieJurassic World NA
## MovieKill the Messenger NA
## MovieKingsman: The Secret Service NA
## MovieLeft Behind NA
## MovieLet's Be Cops NA
## MovieLocker 13 NA
## MovieMad Max: Fury Road NA
## MovieMaggie NA
## MovieMagic Mike XXL NA
## MovieMaleficent NA
## MovieMaps to the Stars NA
## MovieMax NA
## MovieMe and Earl and the Dying Girl NA
## MovieMillion Dollar Arm NA
## MovieMinions NA
## MovieMission: Impossible - Rogue Nation NA
## MovieMoms' Night Out NA
## MovieMortdecai NA
## MovieMr. Peabody & Sherman NA
## MovieMuppets Most Wanted NA
## MovieNeed for Speed NA
## MovieNeighbors NA
## MovieNight at the Museum: Secret of the Tomb NA
## MovieNo Good Deed NA
## MovieNoah NA
## MovieNon-Stop NA
## MovieOuija NA
## MoviePaddington NA
## MoviePaper Towns NA
## MovieParanormal Activity: The Marked Ones NA
## MoviePaul Blart: Mall Cop 2 NA
## MoviePenguins of Madagascar NA
## MoviePitch Perfect 2 NA
## MoviePixels NA
## MoviePlanes: Fire & Rescue NA
## MoviePoltergeist NA
## MoviePompeii NA
## MovieProject Almanac NA
## MovieRicki and the Flash NA
## MovieRide Along NA
## MovieRio 2 NA
## MovieRoad Hard NA
## MovieRoboCop NA
## MovieSabotage NA
## MovieSan Andreas NA
## MovieSelma NA
## MovieSeventh Son NA
## MovieSex Tape NA
## MovieSin City: A Dame to Kill For NA
## MovieSinister 2 NA
## MovieSon of God NA
## MovieSong One NA
## MovieSpy NA
## MovieSt. Vincent NA
## MovieStraight Outta Compton NA
## MovieTaken 3 NA
## MovieTammy NA
## MovieTed 2 NA
## MovieTeenage Mutant Ninja Turtles NA
## MovieTerminator Genisys NA
## MovieThat Awkward Moment NA
## MovieThe Age of Adaline NA
## MovieThe Amazing Spider-Man 2 NA
## MovieThe Best of Me NA
## MovieThe Book of Life NA
## MovieThe Boxtrolls NA
## MovieThe Boy Next Door NA
## MovieThe DUFF NA
## MovieThe Equalizer NA
## MovieThe Expendables 3 NA
## MovieThe Fault in Our Stars NA
## MovieThe Gambler NA
## MovieThe Gift NA
## MovieThe Giver NA
## MovieThe Good Lie NA
## MovieThe Gunman NA
## MovieThe Hobbit: The Battle of the Five Armies NA
## MovieThe Homesman NA
## MovieThe Hundred-Foot Journey NA
## MovieThe Hunger Games: Mockingjay - Part 1 NA
## MovieThe Interview NA
## MovieThe Judge NA
## MovieThe Lazarus Effect NA
## MovieThe Legend of Hercules NA
## MovieThe Lego Movie NA
## MovieThe Loft NA
## MovieThe Longest Ride NA
## MovieThe Maze Runner NA
## MovieThe Monuments Men NA
## MovieThe November Man NA
## MovieThe One I Love NA
## MovieThe Other Woman NA
## MovieThe Purge: Anarchy NA
## MovieThe Pyramid NA
## MovieThe Rover NA
## MovieThe SpongeBob Movie: Sponge Out of Water NA
## MovieThe Theory of Everything NA
## MovieThe Vatican Tapes NA
## MovieThe Water Diviner NA
## MovieThe Wedding Ringer NA
## MovieThe Woman in Black 2: Angel of Death NA
## MovieThink Like a Man Too NA
## MovieTomorrowland NA
## MovieTrainwreck NA
## MovieTranscendence NA
## MovieTransformers: Age of Extinction NA
## MovieTusk NA
## MovieUnbroken NA
## MovieVacation NA
## MovieVeronica Mars NA
## MovieWhen the Game Stands Tall NA
## MovieWhiplash NA
## MovieWild NA
## MovieWinter's Tale NA
## MovieWish I Was Here NA
## MovieWoman in Gold NA
## MovieX-Men: Days of Future Past NA
## MovieYves Saint Laurent NA
## Year2015 NA
## Ratings NA
## Genre2 NA
## Genre3 NA
## Genre6 NA
## Genre7 NA
## Genre8 NA
## Genre9 NA
## Genre10 NA
## Genre12 NA
## Genre15 NA
## Gross NA
## Budget NA
## Screens NA
## Sequel NA
## Sentiment NA
## Views NA
## Likes NA
## Dislikes NA
## Comments NA
## `Aggregate Followers` NA
## isSequelTRUE NA
##
## Residual standard error: NaN on 0 degrees of freedom
## (44 observations deleted due to missingness)
## Multiple R-squared: 1, Adjusted R-squared: NaN
## F-statistic: NaN on 186 and 0 DF, p-value: NA
# exploring with base R and lapply
# modify for your own data, this is geared for mtcars
# for example train$income ~ train[[x]] instead of mtcars
plot_vs_response <- function(df, responseVar, independentVar){
plot(df[[responseVar]] ~ df[[independentVar]], xlab = independentVar, ylab=responseVar)
lw1 <- loess(df[[responseVar]] ~ df[[independentVar]])
j <- order(df[[independentVar]])
lines(df[[independentVar]][j],lw1$fitted[j],col="red",lwd=3)
}
for(var in mdata_encoded %>%select_if(is.numeric) %>% names){
plot_vs_response(mdata_encoded, "Gross",var)
}
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : at 0.97
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : radius 0.0009
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.97
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.03
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1